In [ ]:
import sys, os
import numpy as np
import pandas as pd
import qlib
In [ ]:
from pathlib import Path
scripts_dir = Path("/data/students/huzb/qlib/scripts")
print(scripts_dir.joinpath("get_data.py"))
assert scripts_dir.joinpath("get_data.py").exists()
/data/students/huzb/qlib/scripts/get_data.py
In [ ]:
if not scripts_dir.joinpath("get_data.py").exists():
    # download get_data.py script
    scripts_dir = Path("~/tmp/qlib_code/scripts").expanduser().resolve()
    scripts_dir.mkdir(parents=True, exist_ok=True)
    import requests
    with requests.get("https://raw.githubusercontent.com/microsoft/qlib/main/scripts/get_data.py") as resp:
        with open(scripts_dir.joinpath("get_data.py"), "wb") as fp:
            fp.write(resp.content)
In [ ]:
from qlib.constant import REG_CN
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict

出现ModuleNotFoundError: No module named ‘qlib.data._libs.rolling’¶

在项目根目录运行python setup.py build_ext --inplace解决

In [ ]:
provider_uri = "/data/students/huzb/qlib/qlib_data/cn_data"  # target_dir
# if not exists_qlib_data(provider_uri):
#     print(f"Qlib data is not found in {provider_uri}")
#     sys.path.append(str(scripts_dir))
#     from get_data import GetData
#     GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN)
[24290:MainThread](2022-10-26 10:47:47,632) INFO - qlib.Initialization - [config.py:413] - default_conf: client.
[24290:MainThread](2022-10-26 10:47:47,637) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[24290:MainThread](2022-10-26 10:47:47,638) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/data/students/huzb/qlib/qlib_data/cn_data')}
In [ ]:
market = "csi300"
benchmark = "SH000300"
In [ ]:
from qlib.data import D
from qlib.data.filter import ExpressionDFilter
from qlib.data.filter import NameDFilter
# from dateutil.relativedelta import relativedelta
In [ ]:
instruments = D.instruments(market='csi300')
fields = ['$close']
f_d = D.features(instruments, fields, start_time='2008-01-01', end_time='2020-08-01', freq='day')
df = f_d
w = df.index[-8:-7].get_level_values('datetime') # 倒数第8个日期
df.index = df.index.get_level_values('datetime')
print(df.index.min(), df.index.max())

start_time = pd.to_datetime(df.index.min())
end_time = pd.to_datetime(df.index.max())
week_time = pd.to_datetime(w.max())
print(start_time.strftime('%Y-%m-%d'), end_time.strftime('%Y-%m-%d'), week_time.strftime('%Y-%m-%d'))
2008-01-02 00:00:00 2020-07-31 00:00:00
2008-01-02 2020-07-31 2020-07-22

预测一周后的score¶

In [ ]:
experiment_name="online_srv"
In [ ]:
###################################
# train model
###################################
data_handler_config = {
    "start_time": start_time, # 
    "end_time": end_time,
    "fit_start_time": start_time,
    "fit_end_time": "2014-12-31",
    "instruments": market,
    "infer_processors": [
      {
        "class": "RobustZScoreNorm",
        "kwargs": {
          "fields_group": "feature",
          "clip_outlier": True
        }
      },
      {
        "class": "Fillna",
        "kwargs": {
          "fields_group": "feature"
        }
      }
    ],
    "learn_processors": [
      {
        "class": "DropnaLabel"
      },
      {
        "class": "CSRankNorm",
        "kwargs": {
          "fields_group": "label"
        }
      }
    ],
    "label": [
      "Ref($close, -8) / Ref($close, -1) - 1"
    ]
}

task = {   
    "model": {
        "class": "TransformerModel",
        "module_path": "qlib.contrib.model.pytorch_transformer",
        "kwargs": {
            "d_feat": 6,
            "seed": 0
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Huzb360",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": (start_time, "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", week_time),
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
[24290:MainThread](2022-10-26 10:47:58,054) INFO - qlib.TransformerModel - [pytorch_transformer.py:65] - Naive Transformer:
batch_size : 2048
device : cuda:0
[24290:MainThread](2022-10-26 10:50:28,548) INFO - qlib.timer - [log.py:117] - Time cost: 147.534s | Loading data Done
/home/huzb/anaconda3/envs/py38/lib/python3.8/site-packages/numpy/lib/nanfunctions.py:1095: RuntimeWarning: All-NaN slice encountered
  result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
[24290:MainThread](2022-10-26 11:24:49,478) INFO - qlib.timer - [log.py:117] - Time cost: 1983.396s | RobustZScoreNorm Done
[24290:MainThread](2022-10-26 11:24:50,423) INFO - qlib.timer - [log.py:117] - Time cost: 0.940s | Fillna Done
[24290:MainThread](2022-10-26 11:24:54,741) INFO - qlib.timer - [log.py:117] - Time cost: 3.710s | DropnaLabel Done
/data/students/huzb/qlib/qlib/data/dataset/processor.py:352: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[cols] = t
[24290:MainThread](2022-10-26 11:31:51,926) INFO - qlib.timer - [log.py:117] - Time cost: 417.183s | CSRankNorm Done
[24290:MainThread](2022-10-26 11:31:51,928) INFO - qlib.timer - [log.py:117] - Time cost: 2483.375s | fit & process data Done
[24290:MainThread](2022-10-26 11:31:51,929) INFO - qlib.timer - [log.py:117] - Time cost: 2630.916s | Init data Done
In [ ]:
# start exp to train model

experiment_id = 'cn_backtest'
# experiment_name: Optional[Text] = None,
# recorder_id: Optional[Text] = None,

# with R.start(experiment_name=experiment_name, experimen
# t_id=experiment_id):
with R.start(experiment_name=experiment_name):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id
    # prediction
    recorder = R.get_recorder()
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()
[24290:MainThread](2022-10-26 11:31:52,065) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7ff0a684db50>
[24290:MainThread](2022-10-26 11:31:52,278) INFO - qlib.workflow - [exp.py:257] - Experiment 1 starts running ...
[24290:MainThread](2022-10-26 11:31:52,983) INFO - qlib.workflow - [recorder.py:295] - Recorder 45841841c62940a3add0b69d9f97b18b starts running under Experiment 1 ...
[24290:MainThread](2022-10-26 11:32:06,622) INFO - qlib.TransformerModel - [pytorch_transformer.py:191] - training...
[24290:MainThread](2022-10-26 11:32:06,624) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch0:
[24290:MainThread](2022-10-26 11:32:06,626) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:32:56,868) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:33:25,843) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -1.000089, valid -0.995834
[24290:MainThread](2022-10-26 11:33:25,854) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch1:
[24290:MainThread](2022-10-26 11:33:25,856) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:34:14,138) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:34:43,211) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996287, valid -0.994542
[24290:MainThread](2022-10-26 11:34:43,221) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch2:
[24290:MainThread](2022-10-26 11:34:43,222) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:35:31,834) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:36:00,713) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996294, valid -0.994722
[24290:MainThread](2022-10-26 11:36:00,716) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch3:
[24290:MainThread](2022-10-26 11:36:00,718) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:36:49,131) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:37:18,220) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.997152, valid -0.997195
[24290:MainThread](2022-10-26 11:37:18,223) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch4:
[24290:MainThread](2022-10-26 11:37:18,224) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:38:06,847) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:38:35,763) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996107, valid -0.994453
[24290:MainThread](2022-10-26 11:38:35,771) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch5:
[24290:MainThread](2022-10-26 11:38:35,772) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:39:24,130) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:39:53,400) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.999094, valid -0.997211
[24290:MainThread](2022-10-26 11:39:53,403) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch6:
[24290:MainThread](2022-10-26 11:39:53,404) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:40:41,887) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:41:11,006) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.995804, valid -0.993554
[24290:MainThread](2022-10-26 11:41:11,020) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch7:
[24290:MainThread](2022-10-26 11:41:11,021) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:41:59,347) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:42:28,429) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.995852, valid -0.993349
[24290:MainThread](2022-10-26 11:42:28,439) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch8:
[24290:MainThread](2022-10-26 11:42:28,440) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:43:17,042) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:43:45,913) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996506, valid -0.994279
[24290:MainThread](2022-10-26 11:43:45,916) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch9:
[24290:MainThread](2022-10-26 11:43:45,918) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:44:34,195) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:45:03,309) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.997879, valid -0.993217
[24290:MainThread](2022-10-26 11:45:03,317) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch10:
[24290:MainThread](2022-10-26 11:45:03,317) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:45:51,826) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:46:20,865) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.997670, valid -0.997404
[24290:MainThread](2022-10-26 11:46:20,868) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch11:
[24290:MainThread](2022-10-26 11:46:20,869) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:47:09,106) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:47:38,319) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996070, valid -0.994799
[24290:MainThread](2022-10-26 11:47:38,323) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch12:
[24290:MainThread](2022-10-26 11:47:38,324) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:48:26,795) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:48:55,879) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.995956, valid -0.993564
[24290:MainThread](2022-10-26 11:48:55,882) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch13:
[24290:MainThread](2022-10-26 11:48:55,883) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:49:43,920) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:50:12,973) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996032, valid -0.993774
[24290:MainThread](2022-10-26 11:50:12,976) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch14:
[24290:MainThread](2022-10-26 11:50:12,978) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 11:51:01,567) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 11:51:30,559) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996620, valid -0.993523
[24290:MainThread](2022-10-26 11:51:30,562) INFO - qlib.TransformerModel - [pytorch_transformer.py:213] - early stop
[24290:MainThread](2022-10-26 11:51:30,563) INFO - qlib.TransformerModel - [pytorch_transformer.py:216] - best score: -0.993217 @ 9
[24290:MainThread](2022-10-26 11:51:41,450) INFO - qlib.workflow - [record_temp.py:195] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 1
'The following are prediction results of the TransformerModel model.'
                          score
datetime   instrument          
2017-01-03 SH600000    0.057386
           SH600008    0.050349
           SH600009    0.097617
           SH600010    0.047750
           SH600015    0.020047
[24290:MainThread](2022-10-26 11:51:55,375) INFO - qlib.timer - [log.py:117] - Time cost: 0.000s | waiting `async_log` Done
In [ ]:
model.predict(dataset)
Out[ ]:
datetime    instrument
2017-01-03  SH600000      0.057386
            SH600008      0.050349
            SH600009      0.097617
            SH600010      0.047750
            SH600015      0.020047
                            ...   
2020-07-22  SZ300413     -0.045579
            SZ300433     -0.094042
            SZ300498      0.041083
            SZ300601      0.077929
            SZ300628      0.046742
Length: 259200, dtype: float32

存一周后的预测结果¶

In [ ]:
week_score = model.predict(dataset)
In [ ]:
# dataset.handler._infer.iloc[200].to_string()
In [ ]:
# sr.list()
In [ ]:
# sr.load('pred.pkl')
In [ ]:
# sr.load('label.pkl')

预测一天后的score¶

In [ ]:
experiment_name="online_srv"
In [ ]:
###################################
# train model
###################################
data_handler_config = {
    "start_time": start_time, # 
    "end_time": end_time,
    "fit_start_time": start_time,
    "fit_end_time": "2014-12-31",
    "instruments": market,
    "infer_processors": [
      {
        "class": "RobustZScoreNorm",
        "kwargs": {
          "fields_group": "feature",
          "clip_outlier": True
        }
      },
      {
        "class": "Fillna",
        "kwargs": {
          "fields_group": "feature"
        }
      }
    ],
    "learn_processors": [
      {
        "class": "DropnaLabel"
      },
      {
        "class": "CSRankNorm",
        "kwargs": {
          "fields_group": "label"
        }
      }
    ],
    "label": [
      "Ref($close, -2) / Ref($close, -1) - 1"
    ]
}

task = {   
    "model": {
        "class": "TransformerModel",
        "module_path": "qlib.contrib.model.pytorch_transformer",
        "kwargs": {
            "d_feat": 6,
            "seed": 0
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha360",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": (start_time, "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", week_time),
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
[24290:MainThread](2022-10-26 11:52:17,302) INFO - qlib.TransformerModel - [pytorch_transformer.py:65] - Naive Transformer:
batch_size : 2048
device : cuda:0
[24290:MainThread](2022-10-26 11:58:08,720) INFO - qlib.timer - [log.py:117] - Time cost: 351.309s | Loading data Done
/home/huzb/anaconda3/envs/py38/lib/python3.8/site-packages/numpy/lib/nanfunctions.py:1095: RuntimeWarning: All-NaN slice encountered
  result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
[24290:MainThread](2022-10-26 16:07:30,674) INFO - qlib.timer - [log.py:117] - Time cost: 14595.104s | RobustZScoreNorm Done
[24290:MainThread](2022-10-26 16:07:33,243) INFO - qlib.timer - [log.py:117] - Time cost: 1.606s | Fillna Done
[24290:MainThread](2022-10-26 16:07:44,289) INFO - qlib.timer - [log.py:117] - Time cost: 10.422s | DropnaLabel Done
/data/students/huzb/qlib/qlib/data/dataset/processor.py:352: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[cols] = t
[24290:MainThread](2022-10-26 16:09:26,816) INFO - qlib.timer - [log.py:117] - Time cost: 102.524s | CSRankNorm Done
[24290:MainThread](2022-10-26 16:09:26,820) INFO - qlib.timer - [log.py:117] - Time cost: 15078.098s | fit & process data Done
[24290:MainThread](2022-10-26 16:09:26,822) INFO - qlib.timer - [log.py:117] - Time cost: 15429.411s | Init data Done
In [ ]:
# start exp to train model

experiment_id = 'cn_backtest'
# experiment_name: Optional[Text] = None,
# recorder_id: Optional[Text] = None,

# with R.start(experiment_name=experiment_name, experimen
# t_id=experiment_id):
with R.start(experiment_name=experiment_name):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id
    # prediction
    recorder = R.get_recorder()
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()
[24290:MainThread](2022-10-26 16:09:59,173) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7ff0a69131c0>
[24290:MainThread](2022-10-26 16:09:59,401) INFO - qlib.workflow - [exp.py:257] - Experiment 1 starts running ...
[24290:MainThread](2022-10-26 16:09:59,483) INFO - qlib.workflow - [recorder.py:295] - Recorder d856a3edbb684f119bdac0e6965dd60e starts running under Experiment 1 ...
[24290:MainThread](2022-10-26 16:13:19,884) INFO - qlib.TransformerModel - [pytorch_transformer.py:191] - training...
[24290:MainThread](2022-10-26 16:13:19,887) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch0:
[24290:MainThread](2022-10-26 16:13:19,888) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:13:42,443) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:13:54,103) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993972, valid -0.994532
[24290:MainThread](2022-10-26 16:13:54,124) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch1:
[24290:MainThread](2022-10-26 16:13:54,125) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:14:15,373) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:14:27,166) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993800, valid -0.995058
[24290:MainThread](2022-10-26 16:14:27,169) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch2:
[24290:MainThread](2022-10-26 16:14:27,170) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:14:48,690) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:15:00,500) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993447, valid -0.995081
[24290:MainThread](2022-10-26 16:15:00,503) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch3:
[24290:MainThread](2022-10-26 16:15:00,505) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:15:22,039) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:15:33,890) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.994193, valid -0.996027
[24290:MainThread](2022-10-26 16:15:33,893) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch4:
[24290:MainThread](2022-10-26 16:15:33,895) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:15:55,539) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:16:07,389) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993148, valid -0.993657
[24290:MainThread](2022-10-26 16:16:07,406) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch5:
[24290:MainThread](2022-10-26 16:16:07,407) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:16:28,898) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:16:40,706) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993291, valid -0.995148
[24290:MainThread](2022-10-26 16:16:40,710) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch6:
[24290:MainThread](2022-10-26 16:16:40,711) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:17:02,211) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:17:14,065) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993307, valid -0.995193
[24290:MainThread](2022-10-26 16:17:14,068) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch7:
[24290:MainThread](2022-10-26 16:17:14,070) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:17:35,737) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:17:47,677) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993478, valid -0.994461
[24290:MainThread](2022-10-26 16:17:47,680) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch8:
[24290:MainThread](2022-10-26 16:17:47,681) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:18:09,392) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:18:21,256) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993732, valid -0.994290
[24290:MainThread](2022-10-26 16:18:21,258) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch9:
[24290:MainThread](2022-10-26 16:18:21,259) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training...
[24290:MainThread](2022-10-26 16:18:42,899) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating...
[24290:MainThread](2022-10-26 16:18:54,774) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.994229, valid -0.996022
[24290:MainThread](2022-10-26 16:18:54,777) INFO - qlib.TransformerModel - [pytorch_transformer.py:213] - early stop
[24290:MainThread](2022-10-26 16:18:54,778) INFO - qlib.TransformerModel - [pytorch_transformer.py:216] - best score: -0.993657 @ 4
[24290:MainThread](2022-10-26 16:19:00,463) INFO - qlib.workflow - [record_temp.py:195] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 1
'The following are prediction results of the TransformerModel model.'
                          score
datetime   instrument          
2017-01-03 SH600000    0.029039
           SH600008    0.014598
           SH600009    0.046318
           SH600010    0.037156
           SH600015   -0.001555
[24290:MainThread](2022-10-26 16:19:01,691) INFO - qlib.timer - [log.py:117] - Time cost: 0.000s | waiting `async_log` Done
In [ ]:
model.predict(dataset)
Out[ ]:
datetime    instrument
2017-01-03  SH600000      0.029039
            SH600008      0.014598
            SH600009      0.046318
            SH600010      0.037156
            SH600015     -0.001555
                            ...   
2020-07-22  SZ300413     -0.102836
            SZ300433     -0.064527
            SZ300498      0.050756
            SZ300601     -0.184252
            SZ300628     -0.031806
Length: 259200, dtype: float32

WeekTopkDropoutStrategy¶

In [ ]:
week_score
Out[ ]:
datetime    instrument
2017-01-03  SH600000      0.057386
            SH600008      0.050349
            SH600009      0.097617
            SH600010      0.047750
            SH600015      0.020047
                            ...   
2020-07-22  SZ300413     -0.045579
            SZ300433     -0.094042
            SZ300498      0.041083
            SZ300601      0.077929
            SZ300628      0.046742
Length: 259200, dtype: float32
In [ ]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "WeekTopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
            "week_score" : week_score,
        },
    },
    "backtest": {
        "start_time": "2017-01-01",
        "end_time": week_time,
        "account": 100000000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="online_srv")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "day")
    par.generate()
[24290:MainThread](2022-10-26 16:19:31,255) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7ff11dcd85b0>
[24290:MainThread](2022-10-26 16:19:31,260) INFO - qlib.workflow - [exp.py:257] - Experiment 2 starts running ...
[24290:MainThread](2022-10-26 16:19:31,295) INFO - qlib.workflow - [recorder.py:295] - Recorder 58f3b41a5fe847678105d495c60cce18 starts running under Experiment 2 ...
[24290:MainThread](2022-10-26 16:19:37,353) INFO - qlib.workflow - [record_temp.py:195] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 2
'The following are prediction results of the TransformerModel model.'
                          score
datetime   instrument          
2017-01-03 SH600000    0.029039
           SH600008    0.014598
           SH600009    0.046318
           SH600010    0.037156
           SH600015   -0.001555
[24290:MainThread](2022-10-26 16:19:37,589) INFO - qlib.backtest caller - [__init__.py:94] - Create new exchange
[24290:MainThread](2022-10-26 16:19:52,280) WARNING - qlib.online operator - [exchange.py:216] - factor.day.bin file not exists or factor contains `nan`. Order using adjusted_price.
[24290:MainThread](2022-10-26 16:19:52,283) WARNING - qlib.online operator - [exchange.py:218] - trade unit 100 is not supported in adjusted_price mode.
/data/students/huzb/qlib/qlib/contrib/strategy/signal_strategy.py:98: DeprecationWarning: `model` `dataset` is deprecated; use `signal`.
  warnings.warn("`model` `dataset` is deprecated; use `signal`.", DeprecationWarning)
[24290:MainThread](2022-10-26 16:20:02,190) WARNING - qlib.data - [data.py:662] - load calendar error: freq=day, future=True; return current calendar!
[24290:MainThread](2022-10-26 16:20:02,191) WARNING - qlib.data - [data.py:665] - You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md
[24290:MainThread](2022-10-26 16:20:02,210) WARNING - qlib.BaseExecutor - [executor.py:121] - `common_infra` is not set for <qlib.backtest.executor.SimulatorExecutor object at 0x7ff0a673b970>
backtest loop:   0%|          | 0/864 [00:00<?, ?it/s]
/data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice
  return np.nanmean(self.data)
/data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice
  return np.nanmean(self.data)
/data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice
  return np.nanmean(self.data)
/data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice
  return np.nanmean(self.data)
/data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice
  return np.nanmean(self.data)
[24290:MainThread](2022-10-26 16:20:21,597) INFO - qlib.workflow - [record_temp.py:500] - Portfolio analysis record 'port_analysis_1day.pkl' has been saved as the artifact of the Experiment 2
[24290:MainThread](2022-10-26 16:20:21,616) INFO - qlib.workflow - [record_temp.py:525] - Indicator analysis record 'indicator_analysis_1day.pkl' has been saved as the artifact of the Experiment 2
[24290:MainThread](2022-10-26 16:20:21,667) INFO - qlib.timer - [log.py:117] - Time cost: 0.009s | waiting `async_log` Done
'The following are analysis results of benchmark return(1day).'
                       risk
mean               0.000484
std                0.012217
annualized_return  0.115237
information_ratio  0.611431
max_drawdown      -0.370479
'The following are analysis results of the excess return without cost(1day).'
                       risk
mean               0.000049
std                0.004999
annualized_return  0.011584
information_ratio  0.150214
max_drawdown      -0.133798
'The following are analysis results of the excess return with cost(1day).'
                       risk
mean              -0.000111
std                0.004998
annualized_return -0.026397
information_ratio -0.342376
max_drawdown      -0.163471
'The following are analysis results of indicators(1day).'
     value
ffr    1.0
pa     0.0
pos    0.0
In [ ]:
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="online_srv")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
pred_df_dates = pred_df.index.get_level_values(level='datetime')
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")
{'class': 'Recorder', 'id': '58f3b41a5fe847678105d495c60cce18', 'name': 'mlflow_recorder', 'experiment_id': '1', 'start_time': '2022-10-26 16:19:31', 'end_time': '2022-10-26 16:20:21', 'status': 'FINISHED'}
In [ ]:
report_normal_df
Out[ ]:
account return total_turnover turnover total_cost cost value cash bench
datetime
2017-01-03 1.000000e+08 0.000000 0.000000e+00 0.000000 0.000000e+00 0.000000 0.000000e+00 1.000000e+08 0.009713
2017-01-04 9.995250e+07 0.000000 9.500000e+07 0.950000 4.750000e+04 0.000475 9.500000e+07 4.952500e+06 0.007803
2017-01-05 9.988019e+07 -0.000514 1.180583e+08 0.230692 6.844800e+04 0.000210 9.916914e+07 7.110426e+05 -0.000154
2017-01-06 9.916268e+07 -0.006976 1.388923e+08 0.208591 8.921034e+04 0.000208 9.861588e+07 5.467968e+05 -0.005974
2017-01-09 9.975620e+07 0.006202 1.603424e+08 0.216312 1.106768e+05 0.000216 9.919795e+07 5.582540e+05 0.004848
... ... ... ... ... ... ... ... ... ...
2020-07-16 1.254517e+08 -0.034337 1.430774e+10 0.204871 1.426522e+07 0.000204 1.247551e+08 6.966690e+05 -0.048102
2020-07-17 1.256578e+08 0.001842 1.433145e+10 0.188985 1.429026e+07 0.000200 1.223377e+08 3.320025e+06 0.006299
2020-07-20 1.298532e+08 0.033570 1.435566e+10 0.192686 1.431317e+07 0.000182 1.291544e+08 6.988431e+05 0.029837
2020-07-21 1.293579e+08 -0.003605 1.438282e+10 0.209116 1.434034e+07 0.000209 1.286510e+08 7.068842e+05 0.002295
2020-07-22 1.302124e+08 0.006815 1.440995e+10 0.209777 1.436749e+07 0.000210 1.295057e+08 7.066271e+05 0.004990

864 rows × 9 columns

In [ ]:
analysis_df
Out[ ]:
risk
excess_return_without_cost mean 0.000049
std 0.004999
annualized_return 0.011584
information_ratio 0.150214
max_drawdown -0.133798
excess_return_with_cost mean -0.000111
std 0.004998
annualized_return -0.026397
information_ratio -0.342376
max_drawdown -0.163471
In [ ]:
analysis_position.report_graph(report_normal_df)

TopkDropoutStrategy¶

In [ ]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2017-01-01",
        "end_time": week_time,
        "account": 100000000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="online_srv")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "day")
    par.generate()
In [ ]:
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="online_srv")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
pred_df_dates = pred_df.index.get_level_values(level='datetime')
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")
In [ ]:
report_normal_df
In [ ]:
analysis_position.report_graph(report_normal_df)
In [ ]:
analysis_df
In [ ]: